* This do-file reads in processed datasets on firms from Credit Bureau and creates stacked datasets for running event study analyses.
* Authors: Ellora Derenoncourt & Monica Essig Aberg

* ------------------------------------------------------------------------------
*1. Stacked policy firms dataset (for own effects)
* ------------------------------------------------------------------------------
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	

	* Load event info
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6
	
	levelsof eventid, local(exp)
	* Build panel for each event 
	foreach experiment in `exp' {
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy  {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"' 
		
		* Load cz-firm-month level data
		use "$data/cb/clean_policy_firm_cz.dta", clear
		
		* Restrict to company
		keep if cmp==`cmp_company_code'
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen mw=`mw'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'

		* Create event time variable
		gen etime=mdate-`mdate'-1
		 
		* Restrict to months close to event
		local event_start=-12
		local event_end=11
		keep if inrange(etime, `event_start',`event_end')
		
		* Restrict to CZ cells with at least 30 *actual* emp during sample period
		bysort cz: egen min_emp = min(tot_emp)
		drop if min_emp < 30 

		* Generate max emp by CZ
		bysort cz: egen max_emp = max(tot_emp)
		*drop if min_emp < 30 
		
		* NEW: Create indicators for fully balanced
		foreach num of numlist 1/12 {
			gen e_m`num'_temp = etime == -`num'
			by cz: egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/11 {
			gen e_`num'_temp = etime == `num'
			by cz: egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
		gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11)
		drop e_*

		* Generate outcomes equal to share of emp paid below, at and above minimum wage
		gen share_wage_ltmw=share_wage_lt`mw'
		gen share_wage_mw=share_wage_`mw' //  
		gen share_wage_gtmw=share_wage_gt`mw' //  
		local mw_plus_one=`mw'+1
		local mw_plus_two=`mw'+2
		local mw_plus_three=`mw'+3
		gen share_retain_mw1=share_retain_lt`mw_plus_one'
		gen share_retain_mw2=share_retain_lt`mw_plus_two'
		gen share_retain_mw3=share_retain_lt`mw_plus_three'
		
		* Merge in avg gap calculations for the policy firm
		merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* super* employment_share* avgplacebogap gap_decile) keep (3)

		tempfile experiment`experiment'
		save `experiment`experiment''

	 }
	 
	 clear

	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 
	 * Label variables
	 label variable trt_exp "experiment code"
	 label variable mw "new minimum wage"
	 label variable share_affected "share affected by policy"
	 label variable months_since_last_policy "months since last policy at company"
	 label variable months_until_next_policy "months until next policy at company"
	 label variable balanced_short "fully balanced -6 to 5 months from policy month"
	 label variable balanced_long "fully balanced -12 to 11 months from policy month"
	 label variable etime "months since event"
	 label variable share_wage_ltmw "share with wages less than minimum wage"
	 label variable share_wage_mw "share with wage in minimum wage bin"
	 label variable share_wage_gtmw "share with wage at least minimum wage"
	 label variable T "average gap"
	 label variable avgplacebogap "average placebo gap"

	save "$data/cb/stacked_policy_firm_dataset.dta", replace	
	
	*  Stripped data for spillover estimates
	use cz trt_exp etime ln_avg_wage_exact using "$data/cb/stacked_policy_firm_dataset.dta", clear
	rename ln_avg_wage_exact ln_wage_policy
	save "$data/cb/stacked_policy_firm_wages_dataset.dta", replace
	
* ------------------------------------------------------------------------------
*1. Stacked policy firms dataset for imputation checks
* ------------------------------------------------------------------------------

* Load data --------------------------------------------------------------------
 
* Identify outliers in currentmonthearn
foreach company in "11111" "55555" "44444" "22222" "33333" {
	
		* Load data
		if "`company'"=="22222"{
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_22222_part1.csv", clear 
			tempfile part1
			save `part1'
			
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_22222_part2.csv", clear 
			append using `part1'		
		}
		else {
			insheet using "$data/cb/raw/[Project Name]_result1_2_czone_`company'.csv", clear 
		}

		* Create monthly date and year variables 
		gen str7 str_monthyear = substr(string(archive), 1, 4) + "-" + substr(string(archive), 5, 2) + "-01"
		gen mdate = monthly(str_monthyear, "YM")
		format mdate %tm
		gen month=month(dofm(mdate))
		gen year=yofd(dofm(mdate))
		la var year "Year"
		
		* Number emp with matched monthly pay, wage < 30
		egen tot_freq_currentmonthearn = sum(freq_currentmonthearn), by(archive cz)

		* Collapse to cz-month -------------------------------------------------
		
		collapse (mean) tot_freq_currentmonthearn, by(mdate year month cz)

		* Correct for low reporting/other anomolies ----------------------------
		
		* Flag low-reporting months for matched monthly variables (currentmonthearn)
		preserve
			collapse (sum) tot_freq_currentmonthearn, by(mdate)
			tsset mdate
			tssmooth ma tot_freq_currentmonthearn_ma = tot_freq_currentmonthearn, window(1 0 1)
			gen outlier_curr = tot_freq_currentmonthearn < tot_freq_currentmonthearn_ma*.7

			tempfile outliers_curr
			save `outliers_curr', replace
		restore
		
		merge m:1 mdate using `outliers_curr'
		
		if "`company'" == "11111" {
			replace outlier_curr = 1 if mdate == tm(2022m1) | mdate == tm(2014m1) | mdate == tm(2017m4) | mdate == tm(2021m2) | mdate == tm(2013m1)
		}
		if "`company'" == "55555" {
			replace outlier_curr = 1 if mdate == tm(2018m6) | mdate == tm(2014m1) | mdate == tm(2019m1) | mdate == tm(2020m1)
		}
		if "`company'" == "44444" {
			replace outlier_curr = 1 if mdate == tm(2013m1)
		}
		if "`company'" == "22222" {
			replace outlier_curr = 1 if mdate == tm(2016m11)
		}
		if "`company'" == "33333" {
			replace outlier_curr = 1 if mdate == tm(2014m1) | mdate == tm(2013m1)
		}
			
		
		* Save
		keep cz mdate outlier_curr
		gen cmp_company_code = `company'
		tempfile clean_monthly_`company'
		save `clean_monthly_`company''

}
	* Append
	clear
	foreach company in "11111" "55555" "44444" "22222" "33333"{
		append using `clean_monthly_`company''
	}	

	
* Merge with stacked data
	rename czone cz
	merge 1:m mdate cz cmp using "$data/cb/stacked_policy_firm_dataset.dta", nogen keep(2 3)
	
* Merge with new retention variable
	drop outlier_retain
	merge m:1 mdate cz cmp using "$data/cb/adj_retain", nogen keep(1 3) keepusing(outlier_retain)
	
	save "$data/cb/stacked_policy_firm_imputation_check.dta", replace
	
* ------------------------------------------------------------------------------
*2. Stacked policy firms dataset (for spillovers from other policy firms)
* ------------------------------------------------------------------------------
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	
	
	* Select events
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6
	
	* Load experiment details
	levelsof eventid, local(exp)
	foreach experiment in `exp' {	
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		clear
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"'
		
		* Load policy firm data
		use "$data/cb/clean_policy_firm_cz.dta", clear
		
		* Get rid of policy firm from this experiment
		drop if cmp_company_code == `cmp_company_code'
		
		* Merge in avg gap calculations for the policy firm
		merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* employment_share* super* avgplacebogap) keep (3)
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'
		gen etime=mdate-`mdate'-1
		gen mw=`mw'
		
		* Restrict to within a year of event
		local event_start = -12
		local event_end = 11
		keep if inrange(etime, `event_start',`event_end')
		
		* Create balance indicators
		foreach num of numlist 1/12 {
			gen e_m`num'_temp = etime == -`num'
			bysort cz cmp: egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/11 {
			gen e_`num'_temp = etime == `num'
			bysort cz cmp: egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
		gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11)
		drop e_*
		
		* Generate outcomes equal to share of workers paid below, at and above minimum wage
		gen share_wage_ltmw=share_wage_lt`mw'
		gen share_wage_mw=wage_`mw'/tot_emp
		gen share_wage_gtmw=share_wage_gt`mw'
		
		* Save
		tempfile experiment`experiment'
		save `experiment`experiment''
		
		clear
	}
	
	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 	 
	 * Label variables
	 label variable cmp_company_code "Non-policy company code"
	 label variable trt_exp "experiment code"
	 label variable mw "new policy minimum wage"
	 label variable balanced_short "fully balanced -6 to 5 months from policy month"
	 label variable balanced_long "fully balanced -12 to 11 months from policy month"
	 label variable etime "months since event"
	 label variable share_wage_ltmw "share with wages less than minimum wage"
	 label variable share_wage_mw "share with wage in minimum wage bin"
	 label variable share_wage_gtmw "share with wage at least minimum wage"
	 label variable T "average gap"
	 label variable share_affected "share affected by policy"
	 label variable months_since_last_policy "months since last policy at company"
	 label variable months_until_next_policy "months until next policy at company"
	
	* Clean
	keep cmp_company_code cz mdate month year  avg* ln_* d_* share_wage_ltmw share_wage_gtmw share_wage_mw outlier* mw T trt_exp mw share_affected months_since months_until etime balanced* share_wage_ltmw share_wage_mw share_wage_gtmw super* wage_bill* employment_share*
	 
	 save "$data/cb/stacked_policy_firm_dataset_spillovers.dta", replace
 
* ------------------------------------------------------------------------------
*3. CZ-level dataset of wages for policy and non-policy
* ------------------------------------------------------------------------------
	use "$data/cb/clean_policy_firm_cz.dta", clear
	gen policy_firm=1
	append using "$data/cb/clean_nonpolicy_firm_cz.dta"
	replace policy_firm=0 if policy_firm==.
	replace insample = 1 if policy_firm == 1
	replace spell_id = 1 if policy_firm == 1
	tempfile fulldata
	save `fulldata'

	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	

	* Load event info
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6
	
	levelsof eventid, local(exp)
	* Build panel for each event 
	foreach experiment in `exp' {
	
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"' 
	
	use `fulldata', clear
	* Create event time variable
	gen etime=mdate-`mdate'-1
	 
	local event_start=-12
	local event_end=11
	keep if inrange(etime, `event_start',`event_end')

	gen trt_exp= `experiment'
	gen mw=`mw'
	gen share_affected=`share_affected'
	gen months_since_last_policy=`months_since_last_policy'
	gen months_until_next_policy=`months_until_next_policy'
	
	* Keep CZs where policy company has at least 30 employees at all times during sample period
	bysort cz: egen temp = min(tot_emp) if cmp == `cmp_company_code'
	bysort cz: egen min_emp_cz = min(temp)
	drop temp
	drop if min_emp_cz < 30
	
	* Keep in-sample establishment-spell pairs that with at least 10 employees each month
	keep if insample == 1
	bysort cmp_company_code cz spell_id mdate: egen est_emp = sum(tot_emp)
	foreach num of numlist 1/12 {
		gen e_m`num'_temp = etime == -`num' & est_emp >= 10
		bysort cz cmp spell_id: egen e_m`num' = max(e_m`num'_temp)
	}
	foreach num of numlist 0/11 {
		gen e_`num'_temp = etime == `num' & est_emp >= 10
		bysort cz cmp spell_id: egen e_`num' = max(e_`num'_temp)
	}
	gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11)
	gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
	keep if balanced_short == 1 | cmp == `cmp_company_code'
	drop e_*
	
	* Create gap measure for policy company only
	preserve
		
		keep if cmp==`cmp_company_code'
			
		* Specify start and end dates for gap measure
		local gap_measure_start=-6
		di `gap_measure_start'
		local gap_measure_end=`gap_measure_start'+3
		di `gap_measure_end'
		
		* For wage bins below the minimum wage, the gap is the number of emp per bin times the value of the bin
		forval i=7(1)`bmw'{ 
			local n = `mw'-`i'
			gen gap_`n'=(wage_`i')*`n'
		}

		* For wage bins at or above the minimum wage, the gap is set to 0
		forval i=`mw'(1)29{ 
			gen gap_`i'=0
		}

		* The denominator for each bin is the number of emp per bin times the value of the bin
		forval i=7(1)29{ 
			gen sumwage_`i'=wage_`i'*`i'
		}

		* Gap numerator is sum of gap per bin
		egen sum_gap=rowtotal(gap*)

		* Gap denominator is sum of denominator per bin
		egen sum_wage=rowtotal(sumwage*)

		* Final gap measure is numerator divided by denominator
		gen gap=sum_gap/sum_wage if inrange(etime, `gap_measure_start', `gap_measure_end')	
		qui egen avggap=mean(gap), by(cz) 

		rename avggap T
		sum T,d
		
		* Create placebo gap measure	
		* Specify start and end dates for gap measure
		local placebo_gap_start=-12
		di `placebo_gap_start'
		local placebo_gap_end=`placebo_gap_start'+3
		di `placebo_gap_end'

		* Final gap measure is numerator divided by denominator
		gen placebo_gap=sum_gap/sum_wage if inrange(etime,`placebo_gap_start',`placebo_gap_end') 		
		qui egen avgplacebogap=mean(placebo_gap), by(cz) 	
		
		* Save T to merge with nonpolicy stacked dataset
		contract T avgplacebogap cz
		tempfile company_gap
		save `company_gap'
		
	restore 
	
	* Create super gap and wage bill share measure	
	* Specify start and end dates for gap and share measures
	local gap_measure_start=-6
	di `gap_measure_start'
	local gap_measure_end=`gap_measure_start'+3
	di `gap_measure_end'

	* For wage bins below the minimum wage, the gap is the number of workers per bin times the value of the bin, only at the policy company
	forval i=7(1)`bmw'{ 
		local n = `mw'-`i'
		gen gap_`n'=(wage_`i')*`n'
		replace gap_`n'=0 if cmp_company_code!=`cmp_company_code'
	}

	* For wage bins at or above the minimum wage, the gap is set to 0
	forval i=`mw'(1)29{ 
		gen gap_`i'=0
	}
	
	* For the wage bill share, the numerator is the total wage bill at the policy company
	forval i=7(1)29{ 
		gen wage_bill_share_`i'=(wage_`i')*`i'
		replace wage_bill_share_`i'=0 if cmp_company_code!=`cmp_company_code'
	}

	* The denominator is the total wage bill - sum of number of workers per bin times the value of the bin
	forval i=7(1)29{ 
		gen sumwage_`i'=wage_`i'*`i'
		gen sumwagepol_`i'=wage_`i'*`i'
		gen sumwagenp_`i'=wage_`i'*`i'
		gen sumwagelg_`i'=wage_`i'*`i'
		gen sumwagenplg_`i'=wage_`i'*`i'
		replace sumwagepol_`i'= 0 if cmp != `cmp_company_code'
		replace sumwagenp_`i'= 0 if cmp == `cmp_company_code'
		replace sumwagelg_`i'= 0 if balanced_long == 0
		replace sumwagenplg_`i'= 0 if cmp == `cmp_company_code' | balanced_long == 0
	}

	* For the employment share, the numerator is the total employment at the policy company
	forval i=7(1)29{ 
		gen employment_share_`i'=(wage_`i')
		replace employment_share_`i'=0 if cmp_company_code!=`cmp_company_code'
	}

	* The denominator is total employment - sum of number of workers per bin
	forval i=7(1)29{ 
		gen sumemp_`i'=wage_`i'
		gen sumemppol_`i' = wage_`i'
		gen sumempnp_`i' = wage_`i'
		gen sumemplg_`i'=wage_`i'
		gen sumempnplg_`i' = wage_`i'
		replace sumemppol_`i' = 0 if cmp != `cmp_company_code'
		replace sumempnp_`i' = 0 if cmp == `cmp_company_code'
		replace sumemplg_`i' = 0 if balanced_long == 0
		replace sumempnplg_`i' = 0 if cmp == `cmp_company_code' | balanced_long == 0
	}
	
	* Gap numerator is sum of gap per bin
	egen sum_gap=rowtotal(gap_*)
	
	* Wage bill share numerator is sum of wage bill
	egen sum_wage_bill_share=rowtotal(wage_bill_share*)

	* Employment share numerator is sum of employment share
	egen sum_employment_share=rowtotal(employment_share*)
	
	* Denominator for gap and wage bill share is sum of wage bill per bin (numerator for avg. wage)
	egen sum_wage=rowtotal(sumwage_*)
	egen sum_wage_pol=rowtotal(sumwagepol_*)
	egen sum_wage_np=rowtotal(sumwagenp_*)
	egen sum_wage_lg=rowtotal(sumwagelg_*)
	egen sum_wage_np_lg=rowtotal(sumwagenplg_*)

	* Denominator for employment is sum of workers per bin
	egen sum_emp=rowtotal(sumemp_*)
	egen sum_emp_pol=rowtotal(sumemppol_*)
	egen sum_emp_np=rowtotal(sumempnp_*)
	egen sum_emp_lg=rowtotal(sumemplg_*)
	egen sum_emp_np_lg=rowtotal(sumempnplg_*)

	* Collapse to CZ-month level
	collapse (sum) sum_gap* sum_wage* sum_emp*, by(cz mdate etime)

	* Final gap measure is numerator divided by denominator
	gen gap=sum_gap/sum_wage	
	qui replace gap=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen super_gap=mean(gap), by(cz) 
	gen gap_lg=sum_gap/sum_wage_lg	
	qui replace gap_lg=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen super_gap_lg=mean(gap_lg), by(cz) 
	
	* Final wage bill share measure is numerator divided by denominator
	gen wage_bill_share_monthly=sum_wage_bill_share/sum_wage	
	qui replace wage_bill_share_monthly=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen wage_bill_share=mean(wage_bill_share_monthly), by(cz) 
	
	* Bring in alternative denominators for employment share
	rename cz czone
	merge 1:1 czone mdate using "$data/qcew/qcew_emp_cz", keep(1 3) nogen keepusing(emp_main_ind emp_all)
	rename czone cz

	* Final employment share measure is numerator divided by denominator
	gen employment_share_monthly=sum_employment_share/sum_emp
	gen employment_share_monthly_q=sum_employment_share/emp_main_ind
	gen employment_share_monthly_q_all=sum_employment_share/emp_all
	qui replace employment_share_monthly=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui replace employment_share_monthly_q=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui replace employment_share_monthly_q_all=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen employment_share=mean(employment_share_monthly), by(cz) 
	qui egen employment_share_q=mean(employment_share_monthly_q), by(cz) 
	qui egen employment_share_q_all=mean(employment_share_monthly_q_all), by(cz)
	
	* Average wage is wage bill divided by employment
	gen avg_wage = sum_wage/sum_emp
	gen avg_wage_pol = sum_wage_pol/sum_emp_pol
	gen avg_wage_np = sum_wage_np/sum_emp_np
	gen avg_wage_lg = sum_wage_lg/sum_emp_lg
	gen avg_wage_np_lg = sum_wage_np_lg/sum_emp_np_lg
	
	keep super_gap super_gap_lg wage_bill_share employment_share employment_share_q employment_share_q_all avg_wage avg_wage_lg avg_wage_pol avg_wage_np* sum_emp_np sum_emp_pol sum_emp mdate cz
	merge m:1 cz using `company_gap', nogen keep (3)
	
	xtile gap_decile = T, nq(10)
	xtile super_gap_decile = super_gap, nq(10)
	cap xtile super_gap_lg_decile = super_gap_lg, nq(10)
	xtile wage_bill_share_decile = wage_bill_share, nq(10)
	xtile employment_share_decile = employment_share, nq(10)
	xtile employment_share_q_decile = employment_share_q, nq(10)
	xtile employment_share_q_all_decile = employment_share_q_all, nq(10)
	
	gen trt_exp=`experiment'

	tempfile exp`experiment'
	save `exp`experiment'', replace
	}
	
	* Append
	clear
	foreach experiment in `exp' {
		append using `exp`experiment''
	}
	
	save "$data/cb/cz_month_avg_wage_balanced_estab.dta", replace
	
*-------------------------------------------------------------------------------
*4. Stacked non-policy firm dataset 
*-------------------------------------------------------------------------------	
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	
	
	* Merge new hires from policy
	use "$data/cb/raw_policy_to_nonpolicy_new_hires.dta", clear
	
	* Collapse across wage bins
	collapse (sum) new_hires = freq , by(cmp_company_code cmp_company_code_previous cz mdate )
	
	reshape wide new_hires,i(cmp_company_code cz mdate) j(cmp_company_code_previous) string
	egen all_new_hires=rowtotal(new_hires*)
	tempfile new_hires_from_policy
	save `new_hires_from_policy'
	
	* Merge in separations to policy
	insheet using "$data/cb/raw/[Project Name]_result1_7_czone_nonpol_pol_hire_date.csv", clear
	
	* Create monthly date and year variables 
	gen str7 str_monthyear = substr(string(wt_), 1, 4) + "-" + substr(string(wt_), 5, 2) + "-01"
	gen mdate = monthly(str_monthyear, "YM")
	format mdate %tm
	gen month=month(dofm(mdate))
	gen year=yofd(dofm(mdate))	
	
	rename cmp_company_code_previous cmp_company_code
	rename czone cz		
	drop wt_original_hire_date_new str_monthyear month year
	rename freq separations_to_policy
	reshape wide separations_to_policy, i(cmp_company_code mdate cz) j(cmp_company_code_new) string
	egen all_separations=rowtotal(separations_to_policy*)
	tempfile separations_to_policy
	save `separations_to_policy', replace			
		
	* Read in events dataset and select events
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6 // Drops the two events associated with company code 66666
	
	* Load experiment details
	levelsof eventid, local(exp)
	foreach experiment in `exp' {	
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		clear
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"'
		
		* Load nonpolicy firm data
	
		use "$data/cb/clean_nonpolicy_firm_cz.dta", clear	 // Read in the cleaned non-policy firm data
		
		*keep if insample==1 // MEA EDIT
		egen spell_cmp=group(spell_id cmp_company_code)	// create a spell X company id
		egen estab_id = group(spell_id cmp_company_code cz) // create an etablishment id, consisting of spell X company X cz
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'
		gen etime=mdate-`mdate'-1
		gen mw=`mw'
		
		* Restrict to within a year of event
		local event_start = -12
		local event_end = 11
		keep if inrange(etime, `event_start',`event_end')
		
		*** RESTRICT TO 12-MONTH BALANCED PANEL OF ESTABLISHMENTS
		
		* Keep in-sample establishment-spell pairs that with at least 10 employees each month
		bysort estab_id mdate: egen est_emp = sum(tot_emp)
		foreach num of numlist 1/12 {
			gen e_m`num'_temp = etime == -`num' & est_emp >= 10
			bysort cz cmp spell_id: egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/11 {
			gen e_`num'_temp = etime == `num' & est_emp >= 10
			bysort cz cmp spell_id: egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11) // Create indicator for 24-month balanced panel
		gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
		* keep if balanced_short == 1 // MEA EDIT
		drop e_*
		
		* Count CZs per company
		egen cz_unique = tag(cz cmp_company_code)
		egen tot_cz_count=total(cz_unique),by(cmp_company_code)
		
		* Generate outcomes equal to share of workers paid below, at and above minimum wage
		gen share_wage_ltmw=share_wage_lt`mw'
		gen share_wage_mw=wage_`mw'/tot_emp
		gen share_wage_gtmw=share_wage_gt`mw'/tot_emp
		
		* Create gap for non-policy establishments	
		* Specify start and end dates for gap and share measures
		local gap_measure_start=-6
		di `gap_measure_start'
		local gap_measure_end=`gap_measure_start'+3
		di `gap_measure_end'

		* For wage bins below the minimum wage, the gap is the number of workers per bin times the value of the bin, only at the policy company
		forval i=7(1)`bmw'{ 
			local n = `mw'-`i'
			gen gap_`n'=(wage_`i')*`n'
		}

		* For wage bins at or above the minimum wage, the gap is set to 0
		forval i=`mw'(1)29{ 
			gen gap_`i'=0
		}

		* The denominator is the total wage bill - sum of number of workers per bin times the value of the bin
		forval i=7(1)29{ 
			gen sumwage_`i'=wage_`i'*`i'
			gen sumwagelg_`i'=wage_`i'*`i'
			replace sumwagelg_`i'= 0 if balanced_long == 0
		}	

		* Gap numerator is sum of gap per bin
		egen sum_gap=rowtotal(gap_*)

		* Denominator for gap is sum of wage bill per bin (numerator for avg. wage)
		egen sum_wage=rowtotal(sumwage_*)
		egen sum_wage_lg=rowtotal(sumwagelg_*)

		* Final gap measure is numerator divided by denominator
		gen gap=sum_gap/sum_wage	
		qui replace gap=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
		qui egen np_gap=mean(gap), by(cz cmp_company_code) 
		gen gap_lg=sum_gap/sum_wage_lg	
		qui replace gap_lg=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
		qui egen np_gap_lg=mean(gap_lg), by(cz cmp_company_code) 
		
		* Create placebo gap measure	
		* Specify start and end dates for gap measure
		local placebo_gap_start=-12
		di `placebo_gap_start'
		local placebo_gap_end=`placebo_gap_start'+3
		di `placebo_gap_end'

		* Final gap measure is numerator divided by denominator
		gen placebo_gap=sum_gap/sum_wage if inrange(etime,`placebo_gap_start',`placebo_gap_end') 		
		qui egen np_placebo_gap=mean(placebo_gap), by(cz) 			

		* Create longer gap measure	
		* Specify start and end dates for gap measure
		local long_gap_start=-12
		di `long_gap_start'
		local long_gap_end=`long_gap_start'+9
		di `long_gap_end'

		* Final gap measure is numerator divided by denominator
		gen long_gap=sum_gap/sum_wage if inrange(etime,`long_gap_start',`long_gap_end') 		
		qui egen np_long_gap=mean(long_gap), by(cz) 	
		
		drop gap_* sumwage*
	
		* Merge in avg gap calculations for the policy firm
		merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* employment_share* super* avgplacebogap) keep (3) 
		merge m:1 cz using "$data/cb/super_gap_ind_measures_`experiment'.dta", nogen keepusing(super*) 

		* Bring in dataset of flows from non-policy now that we have the final establishment-level dataset
		preserve 
		contract spell_id cmp_company_code cz estab_id mdate // MEA EDIT: ADD MDATE FOR MATCHING
		merge m:1 mdate cmp_company_code cz using `separations_to_policy', keep(1 3) keepusing(all_separations separations_to_policy`cmp_company_code' mdate) 
		replace all_separations=1 if _merge==1
		replace separations_to_policy`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) separations_to_policy`cmp_company_code' all_separations, by(spell_id cmp_company_code cz estab_id)
		gen share_sep_policy=separations_to_policy`cmp_company_code'/all_separations
		replace share_sep_policy=1 if share_sep_policy>1 & share_sep_policy!=.
		xtile sep_policy_decile=share_sep_policy,nq(10)
		gen feeder_estab=share_sep_policy>0
		tempfile preperiod_feed_estab
		save `preperiod_feed_estab'

		collapse (sum) separations_to_policy`cmp_company_code' all_separations, by(cmp_company_code)
		tempfile preperiod_feed_firm
		gen share_sep_policy=separations_to_policy`cmp_company_code'/all_separations
		replace share_sep_policy=1 if share_sep_policy>1 & share_sep_policy!=.
		xtile sep_policy_decile=share_sep_policy,nq(10)
		gen feeder_firm=share_sep_policy>0
		keep feeder_firm cmp_company_code
		save `preperiod_feed_firm'		
		restore 
		
		* Bring in dataset of flows to non-policy now that we have the final establishment-level dataset
		preserve 
		contract spell_id cmp_company_code cz estab_id mdate // MEA EDIT: ADD MDATE FOR MATCHING
		merge m:1 cmp_company_code cz mdate using `new_hires_from_policy', keep(1 3) keepusing(all_new_hires new_hires`cmp_company_code' mdate) 
		replace all_new_hires=1 if _merge==1
		replace new_hires`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) new_hires`cmp_company_code' all_new_hires, by(spell_id cmp_company_code cz estab_id)
		gen share_hire_policy=new_hires`cmp_company_code'/all_new_hires
		replace share_hire_policy=1 if share_hire_policy>1 & share_hire_policy!=.
		xtile hire_policy_decile=share_hire_policy,nq(10)
		gen poaching_estab=share_hire_policy>0
		tempfile preperiod_poach_estab
		save `preperiod_poach_estab'

		collapse (sum) new_hires`cmp_company_code' all_new_hires, by(cmp_company_code)
		gen share_hire_policy=new_hires`cmp_company_code'/all_new_hires
		replace share_hire_policy=1 if share_hire_policy>1 & share_hire_policy!=.
		xtile hire_policy_decile=share_hire_policy,nq(10)
		gen poaching_firm=share_hire_policy>0
		keep poaching_firm cmp_company_code
		tempfile preperiod_poach_firm
		save `preperiod_poach_firm'		
		restore 
		
		merge m:1 cmp_company_code cz estab_id using `preperiod_feed_estab'
		drop _merge

		merge m:1  cmp_company_code using `preperiod_feed_firm'
		drop _merge

		merge m:1 cmp_company_code cz estab_id using `preperiod_poach_estab'
		drop _merge

		merge m:1  cmp_company_code using `preperiod_poach_firm'
		drop _merge
		
		* Save
		tempfile experiment`experiment'
		save `experiment`experiment''
		
		clear
	}
	
	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 	 
	 * Label variables
	 label variable cmp_company_code "Non-policy company code"
	 label variable trt_exp "experiment code"
	 label variable mw "new policy minimum wage"
	 label variable balanced_short "fully balanced -6 to 5 months from policy month"
	 label variable balanced_long "fully balanced -12 to 11 months from policy month"
	 label variable etime "months since event"
	 label variable share_wage_ltmw "share with wages less than minimum wage"
	 label variable share_wage_mw "share with wage in minimum wage bin"
	 label variable share_wage_gtmw "share with wage at least minimum wage"
	 label variable T "average gap"
	 label variable tot_cz_count "number of CZs with any employees at non-policy company ever"
	 label variable share_affected "share affected by policy"
	 label variable months_since_last_policy "months since last policy at company"
	 label variable months_until_next_policy "months until next policy at company"
	
	* Clean
	drop max_emp max_emp_cz low_report months_low z_d_emp_adj d_emp_large max_d_emp months_d_emp_large max_emp_nat small_firm obs_nat spell_start spell_id spell_length max_spell
	 
	 save "$data/cb/stacked_nonpolicy_firm_dataset.dta", replace
	 
* ------------------------------------------------------------------------------
*5. Stacked policy firms dataset (for own effects)
* ------------------------------------------------------------------------------
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	

	* Load event info
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if months_since_last_policy <= 6
	drop if months_until < 24
	
	levelsof eventid, local(exp)
	* Build panel for each event 
	foreach experiment in `exp' {
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy  {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"' 
		
		* Load cz-firm-month level data
		use "$data/cb/clean_policy_firm_cz.dta", clear
		
		* Restrict to company
		keep if cmp==`cmp_company_code'
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen mw=`mw'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'

		* Create event time variable
		gen etime=mdate-`mdate'-1
		 
		* Restrict to months close to event
		local event_start=-24
		local event_end=23
		keep if inrange(etime, `event_start',`event_end')
		
		* Restrict to CZ cells with at least 30 *actual* emp during sample period
		bysort cz: egen min_emp = min(tot_emp)
		drop if min_emp < 30 

		* Generate max emp by CZ
		bysort cz: egen max_emp = max(tot_emp)
		*drop if min_emp < 30 
		
		* NEW: Create indicators for fully balanced
		foreach num of numlist 1/24 {
			gen e_m`num'_temp = etime == -`num'
			by cz: egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/23 {
			gen e_`num'_temp = etime == `num'
			by cz: egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_6_23 = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11, e_12, e_13, e_14, e_15, e_16, e_17, e_18, e_19, e_20, e_21, e_22, e_23)
		gen balanced_24_23 = min(e_m1, e_m2, e_m3, e_m4, e_m5, e_m6, e_m7, e_m8, e_m9, e_m10, e_m11, e_m12, e_m13, e_m14, e_m15, e_m16, e_m17, e_m18, e_m19, e_m20, e_m21, e_m22, e_m23, e_m24, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11, e_12, e_13, e_14, e_15, e_16, e_17, e_18, e_19, e_20, e_21, e_22, e_23)
		drop e_*

		* Generate outcomes equal to share of emp paid below, at and above minimum wage
		gen share_wage_ltmw=share_wage_lt`mw'
		gen share_wage_mw=share_wage_`mw' // CHANGED TO EXISTING SHARE VAR
		gen share_wage_gtmw=share_wage_gt`mw' // CHANGED TO EXISTING SHARE VAR
		local mw_plus_one=`mw'+1
		local mw_plus_two=`mw'+2
		local mw_plus_three=`mw'+3
		gen share_retain_mw1=share_retain_lt`mw_plus_one'
		gen share_retain_mw2=share_retain_lt`mw_plus_two'
		gen share_retain_mw3=share_retain_lt`mw_plus_three'
		
		* Merge in avg gap calculations for the policy firm
		merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* super* employment_share* avgplacebogap gap_decile) keep (3)

		tempfile experiment`experiment'
		save `experiment`experiment''

	 }
	 
	 clear

	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 
	 * Label variables
	 label variable trt_exp "experiment code"
	 label variable mw "new minimum wage"
	 label variable share_affected "share affected by policy"
	 label variable months_since_last_policy "months since last policy at company"
	 label variable months_until_next_policy "months until next policy at company"
	 label variable balanced_6_23 "fully balanced -6 to 23 months from policy month"
	 label variable balanced_24_23 "fully balanced -24 to 23 months from policy month"
	 label variable etime "months since event"
	 label variable share_wage_ltmw "share with wages less than minimum wage"
	 label variable share_wage_mw "share with wage in minimum wage bin"
	 label variable share_wage_gtmw "share with wage at least minimum wage"
	 label variable T "average gap"
	 label variable avgplacebogap "average placebo gap"

	 * Remove unneessary vars
	save "$data/cb/stacked_policy_firm_dataset_long.dta", replace	

*-------------------------------------------------------------------------------
*6. Stacked non-policy firm dataset 
*-------------------------------------------------------------------------------	
	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	
	
	* Merge new hires from policy
	use "$data/cb/raw_policy_to_nonpolicy_new_hires.dta", clear
	
	* Collapse across wage bins
	collapse (sum) new_hires = freq , by(cmp_company_code cmp_company_code_previous cz mdate )
	
	reshape wide new_hires,i(cmp_company_code cz mdate) j(cmp_company_code_previous) string
	egen all_new_hires=rowtotal(new_hires*)
	tempfile new_hires_from_policy
	save `new_hires_from_policy'
	
	* Merge in separations to policy
	insheet using "$data/cb/raw/[Project Name]_result1_7_czone_nonpol_pol_hire_date.csv", clear
	
	* Create monthly date and year variables 
	gen str7 str_monthyear = substr(string(wt_), 1, 4) + "-" + substr(string(wt_), 5, 2) + "-01"
	gen mdate = monthly(str_monthyear, "YM")
	format mdate %tm
	gen month=month(dofm(mdate))
	gen year=yofd(dofm(mdate))	
	
	rename cmp_company_code_previous cmp_company_code
	rename czone cz		
	drop wt_original_hire_date_new str_monthyear month year
	rename freq separations_to_policy
	reshape wide separations_to_policy, i(cmp_company_code mdate cz) j(cmp_company_code_new) string
	egen all_separations=rowtotal(separations_to_policy*)
	tempfile separations_to_policy
	save `separations_to_policy', replace			
		
	* Read in events dataset and select events
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if months_since_last_policy <= 6
	drop if months_until < 24
	
	* Load experiment details
	levelsof eventid, local(exp)
	foreach experiment in `exp' {	

		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		clear
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"'
		
		* Load nonpolicy firm data
	
		use "$data/cb/clean_nonpolicy_firm_cz.dta", clear	 // Read in the cleaned non-policy firm data
		
		*keep if insample==1 // MEA EDIT
		egen spell_cmp=group(spell_id cmp_company_code)	// create a spell X company id
		egen estab_id = group(spell_id cmp_company_code cz) // create an etablishment id, consisting of spell X company X cz
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'
		gen etime=mdate-`mdate'-1
		gen mw=`mw'
		
		* Restrict to within two years of event
		local event_start = -24
		local event_end = 23
		keep if inrange(etime, `event_start',`event_end')
		
		* Keep in-sample establishment-spell pairs that with at least 10 employees each month
		bysort estab_id mdate: egen est_emp = sum(tot_emp)
		foreach num of numlist 1/24 {
			gen e_m`num'_temp = etime == -`num' & est_emp >= 10
			bysort cz cmp spell_id: egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/23 {
			gen e_`num'_temp = etime == `num' & est_emp >= 10
			by cz cmp spell_id: egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_6_23 = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11, e_12, e_13, e_14, e_15, e_16, e_17, e_18, e_19, e_20, e_21, e_22, e_23)
		gen balanced_24_23 = min(e_m1, e_m2, e_m3, e_m4, e_m5, e_m6, e_m7, e_m8, e_m9, e_m10, e_m11, e_m12, e_m13, e_m14, e_m15, e_m16, e_m17, e_m18, e_m19, e_m20, e_m21, e_m22, e_m23, e_m24, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11, e_12, e_13, e_14, e_15, e_16, e_17, e_18, e_19, e_20, e_21, e_22, e_23)
		drop e_*
		
		* Count CZs per company
		egen cz_unique = tag(cz cmp_company_code)
		egen tot_cz_count=total(cz_unique),by(cmp_company_code)
		
		* Generate outcomes equal to share of workers paid below, at and above minimum wage
		gen share_wage_ltmw=share_wage_lt`mw'
		gen share_wage_mw=wage_`mw'/tot_emp
		gen share_wage_gtmw=share_wage_gt`mw'/tot_emp
		
		* Create gap for non-policy establishments	
		* Specify start and end dates for gap and share measures
		local gap_measure_start=-6
		di `gap_measure_start'
		local gap_measure_end=`gap_measure_start'+3
		di `gap_measure_end'

		* For wage bins below the minimum wage, the gap is the number of workers per bin times the value of the bin, only at the policy company
		forval i=7(1)`bmw'{ 
			local n = `mw'-`i'
			gen gap_`n'=(wage_`i')*`n'
		}

		* For wage bins at or above the minimum wage, the gap is set to 0
		forval i=`mw'(1)29{ 
			gen gap_`i'=0
		}

		* The denominator is the total wage bill - sum of number of workers per bin times the value of the bin
		forval i=7(1)29{ 
			gen sumwage_`i'=wage_`i'*`i'
			gen sumwagelg_`i'=wage_`i'*`i'
			replace sumwagelg_`i'= 0 if balanced_6_23 == 0
		}	

		* Gap numerator is sum of gap per bin
		egen sum_gap=rowtotal(gap_*)

		* Denominator for gap is sum of wage bill per bin (numerator for avg. wage)
		egen sum_wage=rowtotal(sumwage_*)
		egen sum_wage_lg=rowtotal(sumwagelg_*)

		* Final gap measure is numerator divided by denominator
		gen gap=sum_gap/sum_wage	
		qui replace gap=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
		qui egen np_gap=mean(gap), by(cz cmp_company_code) 
		gen gap_lg=sum_gap/sum_wage_lg	
		qui replace gap_lg=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
		qui egen np_gap_lg=mean(gap_lg), by(cz cmp_company_code) 
		
		* Create placebo gap measure	
		* Specify start and end dates for gap measure
		local placebo_gap_start=-12
		di `placebo_gap_start'
		local placebo_gap_end=`placebo_gap_start'+3
		di `placebo_gap_end'

		* Final gap measure is numerator divided by denominator
		gen placebo_gap=sum_gap/sum_wage if inrange(etime,`placebo_gap_start',`placebo_gap_end') 		
		qui egen np_placebo_gap=mean(placebo_gap), by(cz) 			

		drop gap_* sumwage*
	
		* Merge in avg gap calculations for the policy firm
*		merge m:1 cz mdate trt_exp using "$data/cb/cz_month_avg_wage_balanced_estab.dta", nogen keepusing(T wage_bill* employment_share* super* avgplacebogap) keep (3)

		* Merge in avg gap calculations for the policy firm
		merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* employment_share* super* avgplacebogap) keep (3)

		* Bring in dataset of flows from non-policy now that we have the final establishment-level dataset
		preserve 
		contract spell_id cmp_company_code cz estab_id mdate // MEA EDIT: ADD MDATE FOR MATCHING
		merge m:1 mdate cmp_company_code cz using `separations_to_policy', keep(1 3) keepusing(all_separations separations_to_policy`cmp_company_code' mdate) 
		replace all_separations=1 if _merge==1
		replace separations_to_policy`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) separations_to_policy`cmp_company_code' all_separations, by(spell_id cmp_company_code cz estab_id)
		gen share_sep_policy=separations_to_policy`cmp_company_code'/all_separations
		replace share_sep_policy=1 if share_sep_policy>1 & share_sep_policy!=.
		xtile sep_policy_decile=share_sep_policy,nq(10)
		gen feeder_estab=share_sep_policy>0
		tempfile preperiod_feed_estab
		save `preperiod_feed_estab'

		collapse (sum) separations_to_policy`cmp_company_code' all_separations, by(cmp_company_code)
		tempfile preperiod_feed_firm
		gen share_sep_policy=separations_to_policy`cmp_company_code'/all_separations
		replace share_sep_policy=1 if share_sep_policy>1 & share_sep_policy!=.
		xtile sep_policy_decile=share_sep_policy,nq(10)
		gen feeder_firm=share_sep_policy>0
		keep feeder_firm cmp_company_code
		save `preperiod_feed_firm'		
		restore 
		
		* Bring in dataset of flows to non-policy now that we have the final establishment-level dataset
		preserve 
		contract spell_id cmp_company_code cz estab_id mdate // MEA EDIT: ADD MDATE FOR MATCHING
		merge m:1 cmp_company_code cz mdate using `new_hires_from_policy', keep(1 3) keepusing(all_new_hires new_hires`cmp_company_code' mdate) 
		replace all_new_hires=1 if _merge==1
		replace new_hires`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) new_hires`cmp_company_code' all_new_hires, by(spell_id cmp_company_code cz estab_id)
		gen share_hire_policy=new_hires`cmp_company_code'/all_new_hires
		replace share_hire_policy=1 if share_hire_policy>1 & share_hire_policy!=.
		xtile hire_policy_decile=share_hire_policy,nq(10)
		gen poaching_estab=share_hire_policy>0
		tempfile preperiod_poach_estab
		save `preperiod_poach_estab'

		collapse (sum) new_hires`cmp_company_code' all_new_hires, by(cmp_company_code)
		gen share_hire_policy=new_hires`cmp_company_code'/all_new_hires
		replace share_hire_policy=1 if share_hire_policy>1 & share_hire_policy!=.
		xtile hire_policy_decile=share_hire_policy,nq(10)
		gen poaching_firm=share_hire_policy>0
		keep poaching_firm cmp_company_code
		tempfile preperiod_poach_firm
		save `preperiod_poach_firm'		
		restore 
		
		merge m:1 cmp_company_code cz estab_id using `preperiod_feed_estab'
		drop _merge

		merge m:1  cmp_company_code using `preperiod_feed_firm'
		drop _merge

		merge m:1 cmp_company_code cz estab_id using `preperiod_poach_estab'
		drop _merge

		merge m:1  cmp_company_code using `preperiod_poach_firm'
		drop _merge
		
		*sample 10, by(cmp_company_code cz)
		* Save
		tempfile experiment`experiment'
		save `experiment`experiment''
		
		clear
	}
	
	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 	 
	 * Label variables
	 label variable cmp_company_code "Non-policy company code"
	 label variable trt_exp "experiment code"
	 label variable mw "new policy minimum wage"
	 label variable balanced_6_23 "fully balanced -6 to 5 months from policy month"
	 label variable balanced__23 "fully balanced -12 to 11 months from policy month"
	 label variable etime "months since event"
	 label variable share_wage_ltmw "share with wages less than minimum wage"
	 label variable share_wage_mw "share with wage in minimum wage bin"
	 label variable share_wage_gtmw "share with wage at least minimum wage"
	 label variable T "average gap"
	 label variable tot_cz_count "number of CZs with any employees at non-policy company ever"
	 *label variable single_cz "nonpolicy employees only ever in one CZ"
	 label variable share_affected "share affected by policy"
	 label variable months_since_last_policy "months since last policy at company"
	 label variable months_until_next_policy "months until next policy at company"
	
	* Clean
*	keep cmp_company_code cz state* no_state_mw er_naics mdate month year tot_emp_unadj tot_emp avg_wage_bins* ln_avg_wage_bins* ln_tot_emp ln_tot_emp_7_29 mw T trt_exp mw share_affected months_since months_until etime balanced* tot_cz_count single_cz share_wage_ltmw share_wage_mw share_wage_gtmw super* wage_bill* employment_share* *new_hires* *separations* share* poach* feed* *decile*

	drop max_emp max_emp_cz low_report months_low z_d_emp_adj d_emp_large max_d_emp months_d_emp_large max_emp_nat small_firm obs_nat spell_start spell_id spell_length max_spell
	 
	 save "$data/cb/stacked_nonpolicy_firm_dataset_long.dta", replace
	 
*-------------------------------------------------------------------------------
*7. Stacked non-policy firm-level dataset 
*-------------------------------------------------------------------------------	
* Store tempfile of policy firm national level data
	use "$data/cb/clean_policy_firm_national.dta", clear
	drop *_lt* *_gt*
	keep cmp_company_code mdate archive wage_* tot_emp

	* Greater than and less than bins
			* Start with lowest bin
			gen share_wage_7 = wage_7/tot_emp
			gen share_wage_30= wage_30/tot_emp

			* Do the remaining bins
			foreach num of numlist 8/29 {
				
				* Create shares
				foreach var of varlist wage_`num'  {
					gen share_`var' = `var'/tot_emp
					}
				
			}
			
	rename share_wage_* policy_share_wage_*
	rename tot_emp policy_tot_emp
	rename cmp_company_code policy_cmp_company_code
	keep policy_cmp_company_code mdate archive policy_share_wage_* 
	tempfile policy_firm_distribution
	save `policy_firm_distribution'

* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	
	
	* Merge new hires from policy
	use "$data/cb/raw_policy_to_nonpolicy_new_hires.dta", clear
	
	* Collapse across wage bins
	collapse (sum) new_hires = freq , by(cmp_company_code cmp_company_code_previous cz mdate )
	
	reshape wide new_hires,i(cmp_company_code cz mdate) j(cmp_company_code_previous) string
	egen all_new_hires=rowtotal(new_hires*)
	tempfile new_hires_from_policy
	save `new_hires_from_policy'
	
	* Merge in separations to policy
	insheet using "$data/cb/raw/[Project Name]_result1_7_czone_nonpol_pol_hire_date.csv", clear
	
	* Create monthly date and year variables 
	gen str7 str_monthyear = substr(string(wt_), 1, 4) + "-" + substr(string(wt_), 5, 2) + "-01"
	gen mdate = monthly(str_monthyear, "YM")
	format mdate %tm
	gen month=month(dofm(mdate))
	gen year=yofd(dofm(mdate))	
	
	rename cmp_company_code_previous cmp_company_code
	rename czone cz		
	drop wt_original_hire_date_new str_monthyear month year
	rename freq separations_to_policy
	reshape wide separations_to_policy, i(cmp_company_code mdate cz) j(cmp_company_code_new) string
	egen all_separations=rowtotal(separations_to_policy*)
	tempfile separations_to_policy
	save `separations_to_policy', replace			
		
	* Read in events dataset and select events
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6 // Drops the two events associated with company code 66666
	
	* Load experiment details
	levelsof eventid, local(exp)
	foreach experiment in `exp' {	
		
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		clear
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"'
		
		* Load nonpolicy firm data
	
		use "$data/cb/stacked_nonpolicy_firm_dataset.dta", clear
		
		keep if trt_exp==`experiment'
		gen policy_cmp_company_code = `cmp_company_code'

		* Wage variable
		gen wt_wage=ln_avg_wage_bins*tot_emp 
		
		* Exact wage variable
		gen wt_wage_exact=ln_avg_wage_exact*tot_emp 

		* Gap
		gen wt_T=T*tot_emp
		
		* Non-policy gap
		gen wt_np_gap=np_gap*tot_emp
		
		* Super gap
		gen wt_super_gap=super_gap*tot_emp
		
		collapse (sum) wt_wage wt_wage_exact wt_T wt_np_gap wt_super_gap tot_emp wage_7 wage_8 wage_9 wage_10 wage_11 wage_12 wage_13 wage_14 wage_15 wage_16 wage_17 wage_18 wage_19 wage_20 wage_21 wage_22 wage_23 wage_24 wage_25 wage_26 wage_27 wage_28 wage_29 wage_30 , by( policy_cmp_company_code cmp_company_code mdate)
		
		gen ln_avg_wage_bins=wt_wage/tot_emp
		gen ln_avg_wage_exact=wt_wage_exact/tot_emp		
		gen T=wt_T/tot_emp
		gen np_gap=wt_np_gap/tot_emp
		gen super_gap=wt_super_gap/tot_emp
		
		* Greater than and less than bins
		* Start with lowest bin
		gen share_wage_7 = wage_7/tot_emp
		gen share_wage_30= wage_30/tot_emp

		* Do the remaining bins
		foreach num of numlist 8/29 {
			
			* Create shares
			foreach var of varlist wage_`num'  {
				gen share_`var' = `var'/tot_emp
				}
			
		}
		
		* Bring in dataset of flows from non-policy now that we have the final establishment-level dataset
		preserve 
		merge 1:m cmp_company_code mdate using `separations_to_policy', keep(1 3) keepusing(all_separations separations_to_policy`cmp_company_code' mdate) 
		replace all_separations=1 if _merge==1
		replace separations_to_policy`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) separations_to_policy`cmp_company_code' all_separations, by(cmp_company_code)
		tempfile preperiod_feed_firm
		gen share_sep_policy=separations_to_policy`cmp_company_code'/all_separations
		replace share_sep_policy=1 if share_sep_policy>1 & share_sep_policy!=.
		xtile sep_policy_decile=share_sep_policy,nq(10)
		gen feeder_firm=share_sep_policy>0
		keep feeder_firm cmp_company_code
		save `preperiod_feed_firm'		
		restore 
		
		* Bring in dataset of flows to non-policy now that we have the final establishment-level dataset
		preserve 
		merge 1:m cmp_company_code mdate using `new_hires_from_policy', keep(1 3) keepusing(all_new_hires new_hires`cmp_company_code' mdate) 
		replace all_new_hires=1 if _merge==1
		replace new_hires`cmp_company_code'=0 if _merge==1
		keep if mdate<`mdate'
		collapse (sum) new_hires`cmp_company_code' all_new_hires, by(cmp_company_code)
		gen share_hire_policy=new_hires`cmp_company_code'/all_new_hires
		replace share_hire_policy=1 if share_hire_policy>1 & share_hire_policy!=.
		xtile hire_policy_decile=share_hire_policy,nq(10)
		gen poaching_firm=share_hire_policy>0
		keep poaching_firm cmp_company_code
		tempfile preperiod_poach_firm
		save `preperiod_poach_firm'		
		restore 
		
		merge m:1  cmp_company_code using `preperiod_feed_firm'
		drop _merge

		merge m:1  cmp_company_code using `preperiod_poach_firm'
		drop _merge
		
		merge m:1 policy_cmp_company_code mdate using `policy_firm_distribution',  keep(3)
		drop _merge
		
		* Fill in experiment parameters
		gen trt_exp= `experiment'
		gen share_affected=`share_affected'
		gen months_since_last_policy=`months_since_last_policy'
		gen months_until_next_policy=`months_until_next_policy'
		gen etime=mdate-`mdate'-1
		gen mw=`mw'
		
		* Generate distributional distance variable
		foreach num of numlist 7/30 {
			gen dif_wage_bin_`num' = abs(share_wage_`num'-policy_share_wage_`num')
		}
		egen ddist_wage_bin=rowtotal(dif_wage_bin_*)
		gen ddist=0.5*ddist_wage_bin
		replace ddist=. if etime>=0
		egen mean_ddist_pre=mean(ddist),by(cmp_company_code)


		* Create balance indicators
		foreach num of numlist 1/12 {
			gen e_m`num'_temp = etime == -`num'
			bysort cmp_company_code : egen e_m`num' = max(e_m`num'_temp)
		}
		foreach num of numlist 0/11 {
			gen e_`num'_temp = etime == `num'
			bysort cmp_company_code : egen e_`num' = max(e_`num'_temp)
		}
		gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
		gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11)
		drop e_*
		
		* Save
		tempfile experiment`experiment'
		save `experiment`experiment''
		
		clear
	}
	
	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
		 
	 save "$data/cb/stacked_nonpolicy_firm_level_dataset.dta", replace	
	 
*-------------------------------------------------------------------------------
*8. Stacked CZ-level poaching from policy firms dataset 
*-------------------------------------------------------------------------------	

	* Store tempfile of archive to mdate crosswalk
	insheet using "$data/cb/documentation/archive_mdate_crosswalk.csv", clear
	tempfile archive_mdate_crosswalk
	save `archive_mdate_crosswalk'	

	* Load event info
	use "$data/cb/events.dta", clear
	
	* Drop overlapping events
	drop if min(months_since_last_policy, months_until_next_policy) <= 6
	
	levelsof eventid, local(exp)	
	foreach experiment in `exp' {
		* Keep relevant row
		use in `experiment' using "$data/cb/events.dta", clear
		
		* Identify date
		merge 1:1 archive using `archive_mdate_crosswalk'
		
		* Store relevant parameters
		foreach vr in cmp_company_code mw share_affected archive mdate months_since_last_policy months_until_next_policy {
			local `vr'=`vr'[1]
		}
		local bmw=`mw'-1
		local amw=`mw'-1
		
		noi di in green "Experiment `experiment': " in yellow `"`cmp_company_code' (`mw') `mdate'"' 
		
	* Store tempfile of archive to mdate crosswalk
	use "$data/cb/raw_policy_to_nonpolicy_new_hires.dta", clear
	*keep if sample_ind==1

	collapse (sum) freq,by(mdate cmp_company_code cmp*previous cz)	

	reshape wide freq, i(mdate cmp_company_code cz) j(cmp_company_code_previous) string
	
	foreach var of varlist freq*{
		replace `var'=0 if `var'==.
	}
	egen all_sep=rowtotal(freq*)
	
	egen any_policy_hire=sum(freq`cmp_company_code'),by(cmp_company_code)
	drop if any_policy_hire==0
	collapse (sum) all_sep freq*,by(mdate cz)	
	
	gen share_from_policy=freq`cmp_company_code'/all_sep
	gen share_from_other=freqother/all_sep
	gen ln_new_hires=log(all_sep)
	gen ln_new_hires_policy=log(freq`cmp_company_code')
	gen ln_new_hires_other=log(freqother)
	
	* Fill in experiment parameters
	gen trt_exp= `experiment'
	gen mw=`mw'
	gen share_affected=`share_affected'
	gen months_since_last_policy=`months_since_last_policy'
	gen months_until_next_policy=`months_until_next_policy'

	* Create event time variable
	gen etime=mdate-`mdate'-1
	 
	* Restrict to months close to event
	local event_start=-12
	local event_end=11
	keep if inrange(etime, `event_start',`event_end')
	
	merge m:1 cz using "$data/cb/all_gap_measures_`experiment'.dta", nogen keepusing(T wage_bill* employment_share* super* avgplacebogap) keep (3)
	merge 1:1 cz mdate using "$data/cb/clean_nonpolicy_cz.dta", nogen keep (3)

	* Restrict to within a year of event
	local event_start = -12
	local event_end = 11
	keep if inrange(etime, `event_start',`event_end')
	
	* Create balance indicators
	foreach num of numlist 1/12 {
		gen e_m`num'_temp = etime == -`num'
		bysort cz : egen e_m`num' = max(e_m`num'_temp)
	}
	foreach num of numlist 0/11 {
		gen e_`num'_temp = etime == `num'
		bysort cz : egen e_`num' = max(e_`num'_temp)
	}
	gen balanced_short = min(e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5)
	gen balanced_long = min(e_m12, e_m11, e_m10, e_m9, e_m8, e_m7, e_m6, e_m5, e_m4, e_m3, e_m2, e_m1, e_0, e_1, e_2, e_3, e_4, e_5, e_6, e_7, e_8, e_9, e_10, e_11)
	drop e_*
	
	* Save
	tempfile experiment`experiment'
	save `experiment`experiment''
	
	clear
	}
	
	 foreach experiment in `exp'{
		 append using `experiment`experiment''
	 }
	 
	 save "$data/cb/stacked_poaching.dta", replace 	

*-------------------------------------------------------------------------------
*9. Stacked non-policy firm employment share dataset 
*-------------------------------------------------------------------------------	
	 
	* Load nonpolicy firm data
	use using "$data/cb/stacked_nonpolicy_firm_dataset.dta" if balanced_short==1, clear

	drop if cmp==13481	

	drop if trt_exp==4
	keep if inrange(etime, -6, 5)

	* Clean before merge
	rename tot_emp tot_emp_other
	
	keep tot_emp_other mdate cz etime trt_exp cmp_company_code poaching* feeder* sample_ind estab_id T share_affected mw balanced_* months*

	* Merge with policy firm data
	merge m:1 cz etime trt_exp using "$data/cb/stacked_policy_firm_dataset.dta", nogen keep (3) keepusing( tot_emp)
	
	* Bring in alternative denominators for employment share
	rename cz czone
	merge m:1 czone mdate using "$data/qcew/qcew_emp_cz", keep(3) nogen keepusing(emp_main_ind emp_all)
	rename czone cz	
	
	egen tot_emp_all_other=sum(tot_emp_other),by(cz mdate trt_exp etime)
	
	gen tot_emp_all=tot_emp_all_other+tot_emp
	
	* Final employment share measure is numerator divided by denominator
	gen np_emp_share_monthly=tot_emp_other/tot_emp_all
	gen np_emp_share_monthly_q=tot_emp_other/emp_main_ind
	gen np_emp_share_monthly_q_all=tot_emp_other/emp_all

	* Create super gap and wage bill share measure	
	* Specify start and end dates for gap and share measures
	local gap_measure_start=-6
	di `gap_measure_start'
	local gap_measure_end=`gap_measure_start'+3
	di `gap_measure_end'	
	qui replace np_emp_share_monthly=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui replace np_emp_share_monthly_q=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui replace np_emp_share_monthly_q_all=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen np_emp_share=mean(np_emp_share_monthly), by(cz) 
	qui egen np_emp_share_q=mean(np_emp_share_monthly_q), by(cz) 
	qui egen np_emp_share_q_all=mean(np_emp_share_monthly_q_all), by(cz) 
	
	save "$data/cb/stacked_nonpolicy_employment_share.dta", replace
	
*-------------------------------------------------------------------------------
*10. Stacked non-policy firm wage bill share dataset 
*-------------------------------------------------------------------------------	 
	* Load nonpolicy firm data
	use using "$data/cb/stacked_nonpolicy_firm_dataset.dta" if balanced_short==1, clear

	drop if cmp==13481	

	drop if trt_exp==4
	keep if inrange(etime, -6, 5)

	* Clean before merge
	rename tot_emp tot_emp_other
	rename avg_wage_exact avg_wage_exact_other
	
	keep tot_emp_other avg_wage_exact_other mdate cz etime trt_exp cmp_company_code poaching* feeder* sample_ind estab_id T share_affected mw balanced_* months*

	* Merge with policy firm data
	merge m:1 cz etime trt_exp using "$data/cb/stacked_policy_firm_dataset.dta", nogen keep (3) keepusing(avg_wage_exact tot_emp)
	
	* Bring in alternative denominators for employment share
	rename cz czone
	merge m:1 czone mdate using "$data/qcew/qcew_emp_cz", keep(3) nogen keepusing(emp_main_ind emp_all)
	rename czone cz	
	
	gen tot_wage_bill_other=tot_emp_other * avg_wage_exact_other
	gen tot_wage_bill = tot_emp * avg_wage_exact
	
	egen tot_wage_bill_all_other=sum(tot_wage_bill_other),by(cz mdate trt_exp etime)
		
	gen tot_wage_bill_all=tot_wage_bill_all_other+tot_wage_bill
	
	* Final employment share measure is numerator divided by denominator
	gen np_wage_bill_share_monthly=tot_wage_bill_other/tot_wage_bill_all

	* Create super gap and wage bill share measure	
	* Specify start and end dates for gap and share measures
	local gap_measure_start=-6
	di `gap_measure_start'
	local gap_measure_end=`gap_measure_start'+3
	di `gap_measure_end'	
	qui replace np_wage_bill_share_monthly=. if !inrange(etime,`gap_measure_start',`gap_measure_end') 	
	qui egen np_wage_bill_share=mean(np_wage_bill_share_monthly), by(cz) 
	
	save "$data/cb/stacked_nonpolicy_wage_bill_share.dta", replace	
	
*-------------------------------------------------------------------------------
*11. Wage gaps between policy and nonpolicy establishments at the CZ level
*-------------------------------------------------------------------------------	
		
* Get NAICS codes for companies
	insheet using  "$data/cb/raw/[Project Name]_result1_8_ernaics.csv", clear
	contract cmp_company_code er_naics
	quietly bysort cmp_company_code:  gen dup = cond(_N==1,0,_n)
	tab dup
	drop if dup>1
	tempfile cmp_naics
	save `cmp_naics'
	
* Load firm-level nonpolicy Credit Bureau data
use cmp_company_code poaching_firm feeder_firm mdate trt_exp etime cz tot_emp avg_wage_exact tot_emp insample balanced_short if insample == 1 & balanced_short == 1 using "$data/cb/stacked_nonpolicy_firm_dataset.dta", clear

	merge m:1 cmp_company_code using `cmp_naics' , keepusing(er_naics) 
	drop if _merge==2
	drop _merge
	
	gen policy_ind=0
	replace policy_ind=1 if er_naics==452 & (inrange(trt_exp,1,10) | inrange(trt_exp,17,22))
	replace policy_ind=1 if er_naics==454 & trt_exp==23
	replace policy_ind=1 if er_naics==722 & inrange(trt_exp,11,14)
	
	gen feeder_ind= inlist(er_naics,561,452,722,445,448,492,454,444,451)
	gen poaching_ind = inlist(er_naics,561,722,445,452,448,492,444,622,541,451,441,611,453,311)
	gen sample_ind=(feeder_ind ==1 | poaching_ind==1)

* Generate averages for groups of interest across all firms within CZ
	gen avg_wage_pos_flows = avg_wage_exact if (poaching_firm == 1 | feeder_firm == 1)
	gen avg_wage_poach = avg_wage_exact if poaching_firm == 1
	gen avg_wage_feed = avg_wage_exact if feeder_firm == 1
	gen avg_wage_sample_ind = avg_wage_exact if sample_ind == 1
	gen avg_wage_pol_ind = avg_wage_exact if policy_ind == 1
	gen avg_wage_other = avg_wage_exact if feeder_firm != 1 & poaching_firm != 1
	
	gen tot_emp_poach = tot_emp if poaching_firm == 1
	gen tot_emp_feed = tot_emp if feeder_firm == 1
	gen tot_emp_other = tot_emp if feeder_firm != 1 & poaching_firm != 1
	
	collapse (rawsum) tot_emp tot_emp_feed tot_emp_poach tot_emp_other (mean) avg_wage_exact avg_wage_pos_flows avg_wage_poach avg_wage_feed avg_wage_sample_ind avg_wage_pol_ind avg_wage_other [aw = tot_emp], by(mdate etime cz trt_exp)
	rename avg_wage_exact avg_wage_nonpol
	rename tot_emp tot_emp_nonpol
	
* Merge with policy Credit Bureau data
	preserve
		use mdate cz trt_exp avg_wage_exact tot_emp mw etime share_affected using "$data/cb/stacked_policy_firm_dataset.dta", clear
		rename avg_wage_exact avg_wage_policy
		rename tot_emp tot_emp_policy
		tempfile policy
		save `policy'
	restore
	merge m:1 mdate cz trt_exp using `policy', nogen

	gen year = yofd(dofm(mdate))

	drop if trt_exp==15 | trt_exp==16 | trt_exp==4
	
	gen ln_avg_wage_policy=log(avg_wage_policy)
	
	foreach var in avg_wage_nonpol avg_wage_pos_flows avg_wage_poach avg_wage_feed avg_wage_sample_ind avg_wage_pol_ind avg_wage_other {
		gen ln_`var'=log(`var')
	}
	
	foreach group in nonpol pos_flows feed poach other sample_ind pol_ind {
		gen gap_pol_`group' = ln_avg_wage_`group'-ln_avg_wage_policy		
		replace gap_pol_`group' = . if etime>=0
		egen mean_gap_pol_`group'=mean(gap_pol_`group'),by(cz trt_exp)
		replace gap_pol_`group'=mean_gap_pol_`group'
	}

	contract cz trt_exp mw share_affected gap_pol_nonpol gap_pol_pos_flows gap_pol_feed gap_pol_poach gap_pol_other gap_pol_sample_ind gap_pol_pol_ind

	quietly bysort cz trt_exp:  gen dup = cond(_N==1,0,_n)
	keep if dup<=1
	
	keep cz trt_exp mw share_affected gap_pol_nonpol gap_pol_pos_flows gap_pol_feed gap_pol_poach gap_pol_other gap_pol_sample_ind gap_pol_pol_ind
	
	foreach group in nonpol pos_flows feed poach other sample_ind pol_ind {
		xtile gap_pol_`group'_d = gap_pol_`group', nq(10) 
	}
	
	save "$data/cb/stacked_wage_gap_pol_non_pol_cz.dta", replace
		 
